from collections import defaultdict

import pandas as pd

df = pd.read_csv("evaluation.tsv", sep="\t")


r = defaultdict(int)

for i in df["Rating"]:
	r[i] += 1
print(r)


# average rating
results = list(df["Rating"])
print(sum(results)/len(results))


import numpy as np
import matplotlib.pyplot as plt 
 
  
# creating the dataset
# data maps rating to its corresponding count
data = {}
rubric = {1: "Poor", 2: "Average", 3: "Good", 4: "Excellent"}
for i in range(4, 0, -1):
	data[rubric[i]] = r[i] 

courses = list(data.keys())
values = list(data.values())
  
fig = plt.figure(figsize = (4, 4))
 
# creating the bar plot
plt.bar(courses, values, color ='c', 
        width = 0.4)
 
plt.ylabel("Number of Images")
plt.tight_layout()
plt.savefig('/Users/yuwang/Desktop/Gemini in Political Science/source/rating.pdf')
#plt.show()

objects = list(df["Objects"])
vocab = defaultdict(int)

print("Errors")
for i, obj in enumerate(objects):
	if not isinstance(obj, str) or obj == "Error":
		if obj == "Error":
			print(df.iloc[i]["Image"])
		continue
	obj = eval(obj)
	for k, v in obj.items():
		vocab[k.lower()] += 1 # note this is one here
print("Errors done")

print("Number of words", len(vocab))
print("Max frequency", max(vocab.values()))

for k, v in vocab.items():
	if v == 1:
		print(k, v)
	if v == max(vocab.values()):
		print("=====")
		print(k, v)
	if k == "blanket" or k == "toy":
		print("****")
		print(k, v)
print("Min frequency", min(vocab.values()))

avg_ratings = defaultdict(int)
counts = defaultdict(int)
for i in range(len(df)):
	image = df.iloc[i]["Image"].split("_")[0]
	rating = df.iloc[i]["Rating"]
	avg_ratings[image] += rating
	counts[image] += 1
print("Avg rating:")
outlets = sorted(avg_ratings.keys())
for o in outlets:
	print(round(avg_ratings[o]/counts[o], 1))


## images with toy
for i, obj in enumerate(objects):
	if not isinstance(obj, str) or obj == "Error":
		continue
	obj = eval(obj)
	if "toy" in obj or "Toy" in obj or "toy animal" in obj:
		print(df.iloc[i]["Image"])

# axios_0000_1.jpg Y
# business-insider_0019_7.jpeg Y
# cnn_0004_15.jpg
# fox-news_0001_1.jpg
# new-york-magazine_0013_1.jpg
# the-verge_0001_3.jpg
# vice-news_0014_2.jpeg
# vice-news_0017_6.jpg


## images with blanket
print("****" * 5)
print("Blankets:")
c = 0
for i, obj in enumerate(objects):
	if not isinstance(obj, str) or obj == "Error":
		continue
	obj = eval(obj)
	if "blanket" in obj or "Blanket" in obj:
		c += 1
		print(df.iloc[i]["Image"])
print("Blanket images", c)

# al-jazeera-english_0011_9.jpg Y
# al-jazeera-english_0019_3.jpg Y
# associated-press_0007_7.jpg Y [candidate for illustration]
# bloomberg_0000_1.jpg Y
# business-insider_0019_23.jpeg Y
# cnbc_0008_2.jpeg Y
# cnn_0004_15.jpg Y
# cnn_0014_3.jpg Y [image not clear]
# fox-news_0001_1.jpg Y
# fox-news_0015_5.png Y
# nbc-news_0005_2.jpg Y
# newsweek_0011_4.jpg Y
# the-wall-street-journal_0003_2.jpeg Y
# the-wall-street-journal_0006_9.jpg Y
# the-wall-street-journal_0010_3.jpg Y
# the-wall-street-journal_0017_15.jpg Y
# the-washington-times_0012_10.jpg Y
# usa-today_0002_1.jpg Y
# usa-today_0009_2.jpeg Y
# vice-news_0005_1.png Y
# vice-news_0014_2.jpeg Y
# vice-news_0015_2.jpg Y
# vice-news_0017_1.jpg Y
# vice-news_0017_6.jpg Y
# Blanket images 24